from exchange_dialogues_functions import *
config = MovieTranslationConfig()
language = 'telugu'
actor = 'Mahesh_Babu'
landmarks_dir = os.path.join(config.MOVIE_TRANSLATION_DATASET_DIR, 'landmarks', language, actor)
landmarks_files = sorted(glob.glob(os.path.join(landmarks_dir, '*')))
num_of_landmark_files = len(landmarks_files)
Mahesh_Babu_3D_landmarks = np.empty((0, 68, 3))
for i in tqdm.tqdm(range(num_of_landmark_files)):
video_3D_landmarks_full = utils.read_landmarks(language, actor, i, '3D')
video_3D_landmarks = [lm_full[1:] for lm_full in video_3D_landmarks_full if len(lm_full) == 69]
Mahesh_Babu_3D_landmarks = np.vstack((Mahesh_Babu_3D_landmarks, np.array(video_3D_landmarks)))
Mahesh_Babu_3D_landmarks.shape
I choose Mahesh_Babu_0060_frame_040.png
ref_frame = cv2.cvtColor(cv2.imread(os.path.join(config.MOVIE_TRANSLATION_DATASET_DIR, 'frames', 'telugu', 'Mahesh_Babu', 'Mahesh_Babu_0060', 'Mahesh_Babu_0060_frame_040.png')), cv2.COLOR_BGR2RGB)
ref_3D_landmarks = np.array(utils.read_landmarks('telugu', 'Mahesh_Babu', 60, '3D')[40][1:])
utils.plot_3D_landmarks(ref_frame, ref_3D_landmarks)
For example, consider the 282nd landmarks of Mahesh Babu:
utils.plot_3D_landmarks(ref_frame, Mahesh_Babu_3D_landmarks[282])
I estimate the affine Tx required to change the 282nd pose to match that of the reference frame, and apply that Tx.
src_3D_landmarks = Mahesh_Babu_3D_landmarks[282]
# Estimate affine Tx
retval, Rt_to_dst_from_src, _ = cv2.estimateAffine3D(src_3D_landmarks[:36], ref_3D_landmarks[:36])
# Make homogeneous ccoordinates
src_homogeneous_coords = np.hstack(( src_3D_landmarks, np.ones((68, 1)) ))
# Apply Tx
target_lip_landmarks_tx_from_source = np.dot( Rt_to_dst_from_src, src_homogeneous_coords.T ).T.astype('int')
utils.plot_3D_landmarks(ref_frame, target_lip_landmarks_tx_from_source)
In this way, Tx all 3D landmarks.
def affine_3D_tx(source_3D_landmarks, target_3D_landmarks):
retval, Rt_to_dst_from_src, _ = cv2.estimateAffine3D(source_3D_landmarks[:36], target_3D_landmarks[:36])
return np.dot( Rt_to_dst_from_src, np.hstack(( source_3D_landmarks, np.ones((68, 1)) )).T ).T.astype('int')
# # Tx all landmarks
# Mahesh_Babu_3D_landmarks_tx_to_ref = np.array([affine_3D_tx(lm, ref_3D_landmarks) for lm in Mahesh_Babu_3D_landmarks])
# np.savez('Mahesh_Babu_3D_landmarks_and_tx_to_ref',
# Mahesh_Babu_3D_landmarks=Mahesh_Babu_3D_landmarks,
# ref_3D_landmarks=ref_3D_landmarks,
# Mahesh_Babu_3D_landmarks_tx_to_ref=Mahesh_Babu_3D_landmarks_tx_to_ref)
# Load all Tx landmarks
Mahesh_Babu_3D_landmarks_and_tx_to_ref = np.load('Mahesh_Babu_3D_landmarks_and_tx_to_ref.npz')
Mahesh_Babu_3D_landmarks = Mahesh_Babu_3D_landmarks_and_tx_to_ref['Mahesh_Babu_3D_landmarks']
ref_3D_landmarks = Mahesh_Babu_3D_landmarks_and_tx_to_ref['ref_3D_landmarks']
Mahesh_Babu_3D_landmarks_tx_to_ref = Mahesh_Babu_3D_landmarks_and_tx_to_ref['Mahesh_Babu_3D_landmarks_tx_to_ref']
Now that we have Tx all lip landmarks to have same pose, let us cluster them.
# Reshape to make all 68 landmarks in each row
Mahesh_Babu_3D_landmarks_tx_to_ref_reshaped = np.reshape(Mahesh_Babu_3D_landmarks_tx_to_ref, (len(Mahesh_Babu_3D_landmarks_tx_to_ref), -1))
Mahesh_Babu_3D_landmarks_tx_to_ref_reshaped.shape
# Choose random 10000 points to display
display_num = 1000
random_choice = np.random.choice(len(Mahesh_Babu_3D_landmarks_tx_to_ref_reshaped), display_num)
random_lip_landmarks = Mahesh_Babu_3D_landmarks_tx_to_ref_reshaped[random_choice][:, 48:68]
from sklearn.manifold import TSNE
random_lip_landmarks_tsne = TSNE(n_components=2, verbose=1).fit_transform(random_lip_landmarks)
# plt.plot(random_landmarks)
# 1000 points
plt.scatter(random_lip_landmarks_tsne[:, 0], random_lip_landmarks_tsne[:, 1], s=3)
http://scikit-learn.org/stable/modules/clustering.html#spectral-clustering
import sklearn.cluster
spectral_cluster_params = {
'n_clusters' : 18,
'eigen_solver' : None,
'affinity' : 'nearest_neighbors',
'n_neighbors' : 10,
'assign_labels' : 'discretize'
}
spectral = sklearn.cluster.SpectralClustering(n_clusters=spectral_cluster_params['n_clusters'],
eigen_solver=spectral_cluster_params['eigen_solver'],
affinity=spectral_cluster_params['affinity'],
n_neighbors=spectral_cluster_params['n_neighbors'],
assign_labels=spectral_cluster_params['assign_labels'])
# Fit on only mouth
spectral.fit(Mahesh_Babu_3D_landmarks_tx_to_ref_reshaped[:, 48:68])
# Save cluster
from sklearn.externals import joblib
joblib.dump(spectral, 'spectral_cluster.pkl', compress=3)
labels = spectral.labels_.astype(np.int)
unique_labels = np.unique(labels)
print(unique_labels)
# Cluster centres
cluster_centers = []
for i in range(len(unique_labels)):
cluster_centers.append(np.mean(Mahesh_Babu_3D_landmarks_tx_to_ref_reshaped[labels == i], axis=0))
cluster_centers = np.array(cluster_centers)
np.save('cluster_centers', cluster_centers)
cluster_centers = np.load('cluster_centers.npy')
# Plot tSNE clusters
random_labels = labels[random_choice]
random_lip_cluster_centers = []
for i in range(len(unique_labels)):
random_lip_cluster_centers.append(np.mean(random_lip_landmarks_tsne[random_labels == i], axis=0))
random_lip_cluster_centers = np.array(random_lip_cluster_centers)
plt.scatter(random_lip_landmarks_tsne[:, 0], random_lip_landmarks_tsne[:, 1], s=3, c=random_labels)
plt.scatter(random_lip_cluster_centers[:, 0], random_lip_cluster_centers[:, 1], s=15, c='r')
def base_3D_plot(ax, landmarks):
surf = ax.scatter(landmarks[:, 0]*1.2, landmarks[:,1], landmarks[:, 2], c="cyan", alpha=1.0, edgecolor='b')
ax.plot3D(landmarks[:17, 0]*1.2, landmarks[:17,1], landmarks[:17,2], color='blue' )
ax.plot3D(landmarks[17:22, 0]*1.2, landmarks[17:22, 1], landmarks[17:22, 2], color='blue')
ax.plot3D(landmarks[22:27, 0]*1.2, landmarks[22:27, 1], landmarks[22:27, 2], color='blue')
ax.plot3D(landmarks[27:31, 0]*1.2, landmarks[27:31, 1], landmarks[27:31, 2], color='blue')
ax.plot3D(landmarks[31:36, 0]*1.2, landmarks[31:36, 1], landmarks[31:36, 2], color='blue')
ax.plot3D(landmarks[36:42, 0]*1.2, landmarks[36:42, 1], landmarks[36:42, 2], color='blue')
ax.plot3D(landmarks[42:48, 0]*1.2, landmarks[42:48, 1], landmarks[42:48, 2], color='blue')
ax.plot3D(landmarks[48:60, 0]*1.2, landmarks[48:60, 1], landmarks[48:60, 2], color='blue' )
ax.plot3D(landmarks[60:, 0]*1.2, landmarks[60:, 1], landmarks[60:, 2], color='blue' )
ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(15, 5))
for i in range(18):
ax = fig.add_subplot(3, 6, i+1, projection='3d')
base_3D_plot(ax, np.vstack((np.ones((48, 3)) * cluster_centers[i].reshape(68, 3)[48], cluster_centers[i].reshape(68, 3)[48:68])))
for i in range(len(cluster_centers)):
utils.plot_3D_landmarks(ref_frame, np.vstack((np.ones((48, 3)) * cluster_centers[i].reshape(68, 3)[48], cluster_centers[i].reshape(68, 3)[48:68])))
for i in range(len(cluster_centers)):
utils.plot_3D_landmarks(ref_frame, cluster_centers[i].reshape(68, 3))
Consider a smaller face's landmarks. Check if Affine Tx works.
# Make src_3D_landmarks smaller
src_3D_landmarks = Mahesh_Babu_3D_landmarks[100]
src_3D_landmarks_mean = np.mean(src_3D_landmarks, axis=0)
src_3D_landmarks_smaller = (src_3D_landmarks - src_3D_landmarks_mean)/2 + src_3D_landmarks_mean
utils.plot_3D_landmarks(ref_frame, src_3D_landmarks_smaller)
# Estimate Affine 3D Tx
retval, Rt_to_dst_from_src, _ = cv2.estimateAffine3D(src_3D_landmarks_smaller[:36], ref_3D_landmarks[:36])
# Make homogeneous ccoordinates
src_homogeneous_coords = np.hstack(( src_3D_landmarks_smaller, np.ones((68, 1)) ))
# Apply Tx
target_lip_landmarks_tx_from_source = np.dot( Rt_to_dst_from_src, src_homogeneous_coords.T ).T.astype('int')
utils.plot_3D_landmarks(ref_frame, target_lip_landmarks_tx_from_source)